#stats to account for difference between treatments #Script: #1. Biomass second cut
#Data exploration
#assumptions test
#wilcox test for treatment #kruskal test for varieties #posthoc tests
#GLM
#boxcox
#2. Biomass first cut
#Data exploration
#assumptions test #wilcox test for treatment
#kruskal test for varieties
#posthoc tests
summary(biomass$Cut_2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0040 0.5172 1.2165 1.6751 2.3182 9.3890
boxplot(biomass$Cut_2~biomass$Variety, ylab="Biomass [g]", main="Biomass of second cut")
boxplot(biomass$Cut_2~biomass$Treatment, ylab="Biomass [g]",main="Biomass of second cut")
hist(biomass$Cut_2, main="Biomass of second cut")
You can also embed plots, for example:
#t.test
#normality
# test for whole data
biomass_all=biomass$Cut_2
qqnorm(biomass_all)
qqline(biomass_all)
shapiro.test(biomass_all) #p value = <2.2*10^-16
##
## Shapiro-Wilk normality test
##
## data: biomass_all
## W = 0.80181, p-value < 2.2e-16
#data is not normally distributed
#try to transform the dependent variable
#log transformation
biomass_log=log(biomass_all)
qqnorm(biomass_log)
qqline(biomass_log)
shapiro.test(biomass_log) #1.210^-9
##
## Shapiro-Wilk normality test
##
## data: biomass_log
## W = 0.94679, p-value = 1.2e-09
#square root transformation
biomass_sqrt=sqrt(biomass_all)
qqnorm(biomass_sqrt)
qqline(biomass_sqrt)
shapiro.test(biomass_sqrt) #p value 6.59*10^-7
##
## Shapiro-Wilk normality test
##
## data: biomass_sqrt
## W = 0.96705, p-value = 6.59e-07
#independence
#equality of variance
#as assumption of normality is not met use mann whitney test
wilcox.test(biomass$Cut_2 ~ biomass$Treatment) #p-value = 2.179e-14
##
## Wilcoxon rank sum test with continuity correction
##
## data: biomass$Cut_2 by biomass$Treatment
## W = 7310, p-value = 2.179e-14
## alternative hypothesis: true location shift is not equal to 0
#do kurskal wallis test for Varieties
kruskal.test(biomass$Cut_2~biomass$Variety) #p-value = 2.056e-12
##
## Kruskal-Wallis rank sum test
##
## data: biomass$Cut_2 by biomass$Variety
## Kruskal-Wallis chi-squared = 83.865, df = 13, p-value = 2.056e-12
#Calculate pairwise multiple comparisons between group levels.
#These tests are sometimes referred to as Nemenyi-tests for multiple
#comparisons of (mean) rank sums of independent samples
#--> not appropriate for unequal samples
# posthoc.kruskal.nemenyi.test(x=biomass$Cut_2, g=biomass$Variety, dist="Chisquare")
#linear model with interaction
sum.lm=summary(lm(formula=biomass$Cut_2~biomass$Treatment*biomass$Variety))
lm=lm(formula=biomass$Cut_2~biomass$Treatment*biomass$Variety)
plot(lm)
## hat values (leverages) are all = 0.08333333
## and there are no factor predictors; no plot no. 5
#lm without interaction
summary(lm(formula=biomass$Cut_2~biomass$Treatment+biomass$Variety))
##
## Call:
## lm(formula = biomass$Cut_2 ~ biomass$Treatment + biomass$Variety)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7466 -0.7401 -0.1296 0.5248 6.9977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.69425 0.27567 2.518 0.012276 *
## biomass$TreatmenteCO2 + 2°C 1.30508 0.14236 9.168 < 2e-16 ***
## biomass$VarietyAbergain 0.06967 0.37664 0.185 0.853371
## biomass$VarietyAspect 0.26962 0.37664 0.716 0.474594
## biomass$VarietyCarraig 2.13962 0.37664 5.681 3.01e-08 ***
## biomass$VarietyDunluce -0.17004 0.37664 -0.451 0.651957
## biomass$VarietyLilora 1.28329 0.37664 3.407 0.000740 ***
## biomass$VarietyMoy -0.30142 0.37664 -0.800 0.424143
## biomass$VarietySemi-natural11 -0.49279 0.37664 -1.308 0.191679
## biomass$VarietySemi-natural6 0.96692 0.37664 2.567 0.010705 *
## biomass$VarietySemi-natural7 -0.60625 0.37664 -1.610 0.108464
## biomass$VarietySolomon 1.44342 0.37664 3.832 0.000153 ***
## biomass$VarietyWild4 -0.21463 0.37664 -0.570 0.569185
## biomass$VarietyWild6 0.27700 0.37664 0.735 0.462604
## biomass$VarietyWild7 -0.06742 0.37664 -0.179 0.858055
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.305 on 321 degrees of freedom
## Multiple R-squared: 0.3912, Adjusted R-squared: 0.3646
## F-statistic: 14.73 on 14 and 321 DF, p-value: < 2.2e-16
lm.add=lm(formula=biomass$Cut_2~biomass$Treatment+biomass$Variety)
plot(lm)
## hat values (leverages) are all = 0.08333333
## and there are no factor predictors; no plot no. 5
glm=glm(biomass$Cut_2~biomass$Treatment*biomass$Variety)
plot(glm)
## hat values (leverages) are all = 0.08333333
## and there are no factor predictors; no plot no. 5
#try boxcox
lm.biom=lm(biomass$Cut_2~biomass$Variety*biomass$Treatment)
plot(lm.biom)
## hat values (leverages) are all = 0.08333333
## and there are no factor predictors; no plot no. 5
bx.bio=boxcox(lm.biom)
lm.biom.bx=lm((biomass$Cut_2*0.4)~biomass$Variety*biomass$Treatment)
biomass2_log=log(biomass$Cut_2)
shapiro.test(biomass2_log)
##
## Shapiro-Wilk normality test
##
## data: biomass2_log
## W = 0.94679, p-value = 1.2e-09
qqnorm(biomass2_log)
qqline(biomass2_log)
#boxcox return 0.3 -> try log transformation
biomass2_test=biomass$Cut_2^0.5
shapiro.test(biomass2_test)
##
## Shapiro-Wilk normality test
##
## data: biomass2_test
## W = 0.96705, p-value = 6.59e-07
qqnorm(biomass2_test)
qqline(biomass2_test)
biomass2_rezi=biomass$Cut_2*-1
shapiro.test(biomass2_rezi)
##
## Shapiro-Wilk normality test
##
## data: biomass2_rezi
## W = 0.80181, p-value < 2.2e-16
summary(biomass$Cut_1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00040 0.01675 0.04000 0.07429 0.10425 0.50600
boxplot(biomass$Cut_1~biomass$Variety, ylab="Biomass [g]", main="Biomass of first cut")
boxplot(biomass$Cut_1~biomass$Treatment, ylab="Biomass [g]",main="Biomass of first cut")
hist(biomass$Cut_1, main="Biomass of first cut")
Assumptions test and significance tests
#Assumptions test
#normality
# test for whole data
qqnorm(biomass$Cut_1)
qqline(biomass$Cut_1)
shapiro.test(biomass$Cut_1) #p value = <2.2*10^-16
##
## Shapiro-Wilk normality test
##
## data: biomass$Cut_1
## W = 0.76989, p-value < 2.2e-16
#data is not normally distributed
#try to transform the dependent variable
#square root transformation
biomass_sqrt=sqrt(biomass$Cut_1)
qqnorm(biomass_sqrt)
qqline(biomass_sqrt)
shapiro.test(biomass_sqrt) #p value 4.1*10^-7
##
## Shapiro-Wilk normality test
##
## data: biomass_sqrt
## W = 0.94589, p-value = 9.433e-10
#log doesn't work because of zeros in data
#reciproce transformation
biomass_rezi=biomass$Cut_1*(-1)
qqnorm(biomass_rezi)
qqline(biomass_rezi)
shapiro.test(biomass_rezi) #p value: 2.2*10^-16
##
## Shapiro-Wilk normality test
##
## data: biomass_rezi
## W = 0.76989, p-value < 2.2e-16
#boxcox transformation
lm.biom1=lm(biomass$Cut_1~biomass$Treatment)
plot(lm.biom1)
## hat values (leverages) are all = 0.005952381
## and there are no factor predictors; no plot no. 5
boxcox(lm.biom1)
#independence
#equality of variance
#as assumption of normality is not met use wilcox test instead
wilcox.test(biomass$Cut_1~biomass$Treatment) #p-value = 9.145e-16
##
## Wilcoxon rank sum test with continuity correction
##
## data: biomass$Cut_1 by biomass$Treatment
## W = 6956, p-value = 9.145e-16
## alternative hypothesis: true location shift is not equal to 0
#do kurskal wallis test for Varieties
kruskal.test(biomass$Cut_1~biomass$Variety) #
##
## Kruskal-Wallis rank sum test
##
## data: biomass$Cut_1 by biomass$Variety
## Kruskal-Wallis chi-squared = 92.63, df = 13, p-value = 4.38e-14
#Calculate pairwise multiple comparisons between group levels.
#These tests are sometimes referred to as Nemenyi-tests for multiple
#comparisons of (mean) rank sums of independent samples
#--> not appropriate for unequal samples
# posthoc.kruskal.nemenyi.test(x=biomass$Cut_1, g=biomass$Variety, dist="Chisquare")
#GLM
plot(glm(biomass$Cut_1~biomass$Variety*biomass$Treatment))
## hat values (leverages) are all = 0.08333333
## and there are no factor predictors; no plot no. 5
summary(glm(biomass$Cut_1~biomass$Variety*biomass$Treatment))
##
## Call:
## glm(formula = biomass$Cut_1 ~ biomass$Variety * biomass$Treatment)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.20075 -0.03316 -0.00825 0.02162 0.32167
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.0459167 0.0188848
## biomass$VarietyAbergain -0.0146667 0.0267072
## biomass$VarietyAspect 0.0004167 0.0267072
## biomass$VarietyCarraig 0.0543333 0.0267072
## biomass$VarietyDunluce -0.0176833 0.0267072
## biomass$VarietyLilora 0.0131667 0.0267072
## biomass$VarietyMoy -0.0341667 0.0267072
## biomass$VarietySemi-natural11 -0.0317500 0.0267072
## biomass$VarietySemi-natural6 0.0061667 0.0267072
## biomass$VarietySemi-natural7 -0.0374167 0.0267072
## biomass$VarietySolomon 0.0185833 0.0267072
## biomass$VarietyWild4 -0.0104167 0.0267072
## biomass$VarietyWild6 -0.0215000 0.0267072
## biomass$VarietyWild7 -0.0183833 0.0267072
## biomass$TreatmenteCO2 + 2°C 0.0550333 0.0267072
## biomass$VarietyAbergain:biomass$TreatmenteCO2 + 2°C -0.0023667 0.0377696
## biomass$VarietyAspect:biomass$TreatmenteCO2 + 2°C -0.0004500 0.0377696
## biomass$VarietyCarraig:biomass$TreatmenteCO2 + 2°C 0.1104667 0.0377696
## biomass$VarietyDunluce:biomass$TreatmenteCO2 + 2°C 0.0040667 0.0377696
## biomass$VarietyLilora:biomass$TreatmenteCO2 + 2°C 0.0422167 0.0377696
## biomass$VarietyMoy:biomass$TreatmenteCO2 + 2°C -0.0126750 0.0377696
## biomass$VarietySemi-natural11:biomass$TreatmenteCO2 + 2°C -0.0148667 0.0377696
## biomass$VarietySemi-natural6:biomass$TreatmenteCO2 + 2°C 0.0474667 0.0377696
## biomass$VarietySemi-natural7:biomass$TreatmenteCO2 + 2°C -0.0322000 0.0377696
## biomass$VarietySolomon:biomass$TreatmenteCO2 + 2°C 0.0688000 0.0377696
## biomass$VarietyWild4:biomass$TreatmenteCO2 + 2°C -0.0129500 0.0377696
## biomass$VarietyWild6:biomass$TreatmenteCO2 + 2°C 0.0184667 0.0377696
## biomass$VarietyWild7:biomass$TreatmenteCO2 + 2°C -0.0054000 0.0377696
## t value Pr(>|t|)
## (Intercept) 2.431 0.0156 *
## biomass$VarietyAbergain -0.549 0.5833
## biomass$VarietyAspect 0.016 0.9876
## biomass$VarietyCarraig 2.034 0.0428 *
## biomass$VarietyDunluce -0.662 0.5084
## biomass$VarietyLilora 0.493 0.6224
## biomass$VarietyMoy -1.279 0.2018
## biomass$VarietySemi-natural11 -1.189 0.2354
## biomass$VarietySemi-natural6 0.231 0.8175
## biomass$VarietySemi-natural7 -1.401 0.1622
## biomass$VarietySolomon 0.696 0.4871
## biomass$VarietyWild4 -0.390 0.6968
## biomass$VarietyWild6 -0.805 0.4214
## biomass$VarietyWild7 -0.688 0.4918
## biomass$TreatmenteCO2 + 2°C 2.061 0.0402 *
## biomass$VarietyAbergain:biomass$TreatmenteCO2 + 2°C -0.063 0.9501
## biomass$VarietyAspect:biomass$TreatmenteCO2 + 2°C -0.012 0.9905
## biomass$VarietyCarraig:biomass$TreatmenteCO2 + 2°C 2.925 0.0037 **
## biomass$VarietyDunluce:biomass$TreatmenteCO2 + 2°C 0.108 0.9143
## biomass$VarietyLilora:biomass$TreatmenteCO2 + 2°C 1.118 0.2645
## biomass$VarietyMoy:biomass$TreatmenteCO2 + 2°C -0.336 0.7374
## biomass$VarietySemi-natural11:biomass$TreatmenteCO2 + 2°C -0.394 0.6941
## biomass$VarietySemi-natural6:biomass$TreatmenteCO2 + 2°C 1.257 0.2098
## biomass$VarietySemi-natural7:biomass$TreatmenteCO2 + 2°C -0.853 0.3946
## biomass$VarietySolomon:biomass$TreatmenteCO2 + 2°C 1.822 0.0695 .
## biomass$VarietyWild4:biomass$TreatmenteCO2 + 2°C -0.343 0.7319
## biomass$VarietyWild6:biomass$TreatmenteCO2 + 2°C 0.489 0.6252
## biomass$VarietyWild7:biomass$TreatmenteCO2 + 2°C -0.143 0.8864
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.004279637)
##
## Null deviance: 2.4322 on 335 degrees of freedom
## Residual deviance: 1.3181 on 308 degrees of freedom
## AIC: -850.22
##
## Number of Fisher Scoring iterations: 2